

import pandas as pd
from imblearn.over_sampling import SMOTE # 过抽样处理库SMOTE
from imblearn.under_sampling import RandomUnderSampler # 欠抽样处理库RandomUnderSampler


df = pd.read_csv(f'data/arff/output/LC.csv', index_col = 0)
df.head()
x, y = df.iloc[:, :-1], df.iloc[:, -1]
model_smote = SMOTE()  # 过采样模型实例化
x_smote_resampled, y_smote_resampled = model_smote.fit_resample(x, y)  # 输入数据并作过抽样处理
y_smote_resampled = pd.DataFrame(y_smote_resampled, columns=['label'])  # 此时y_smote_resampled 为Series，将其转换为DataFrame
smote_resampled = pd.concat([x_smote_resampled, y_smote_resampled], axis=1)  # 将过抽样后的特征和标签合并
groupby_data_smote = smote_resampled.groupby("label").count()
groupby_data_smote

df.to_csv(f'data/arff/outcome/LC.csv', index=False)




